Topics
:Overview
Enumerations
Data Structures
Functions
HTML Tag Handlers
The vbsHTML class is a base class used to parse html documents. It is supplied with the database library to extract data from Web pages. The vbsHTML class works through multiple inheritance. It includes functions to load and parse HTML files. HTML tags are handled through the use of virtual tag handlers. Derived classes are responsible for processing HTML tags and any associated attributes by overriding the appropriate tag handler.
// The following list of HTML tags is a combination of HTML // 2.0, 3.0, 3.2 tags supported by Netscape's Navigator // web browser, Microsoft's Internet Explorer web browser, // and standards defined by the World Wide Web Consortium. // This list was taken from the Willcam's Comprehensive HTML // Cross Reference at: // http://www.willcam.com/cmat/html/crossref.html enum { // HTML tags and modifiers ID enumeration vbsHTML::vbs_invalid_tag = 0, // Invalid tag specified vbsHTML::vbs_unknown_tag, // Unknown tag specified vbsHTML::vbs_special_tag, // Unknown special tags starting with // an ampersand ending in a semicolon // &xxxx; // Tags and format specifiers with special meaning vbsHTML::vbs_comment_tag, // comment vbsHTML::vbs_less_then, // Less than sign "<" vbsHTML::vbs_greater_then, // Greater then sign ">" vbsHTML::vbs_ampersand, // Ampersand "&" vbsHTML::vbs_nb_space, // Non-breaking space " " vbsHTML::vbs_quote, // Quotation mark """ vbsHTML::vbs_ex_acsii_set, // Extended ASCII character set // HTML tag codes vbsHTML::vbs_a_tag, // anchor vbsHTML::vbs_abbrev_tag, // abbreviation vbsHTML::vbs_acronym_tag, // acronym vbsHTML::vbs_address_tag, // address vbsHTML::vbs_applet_tag, // java applet vbsHTML::vbs_area_tag, // area vbsHTML::vbs_au_tag, // author vbsHTML::vbs_author_tag, // author vbsHTML::vbs_b_tag, // bold vbsHTML::vbs_banner_tag, // banner vbsHTML::vbs_base_tag, // base vbsHTML::vbs_basefont_tag, // base font vbsHTML::vbs_bgsound_tag, // background sound vbsHTML::vbs_big_tag, // big text vbsHTML::vbs_blink_tag, // blink vbsHTML::vbs_blockquote_tag, // block quote vbsHTML::vbs_bq_tag, // block quote vbsHTML::vbs_body_tag, // body vbsHTML::vbs_br_tag, // line break vbsHTML::vbs_caption_tag, // caption vbsHTML::vbs_center_tag, // center vbsHTML::vbs_cite_tag, // citation vbsHTML::vbs_code_tag, // code vbsHTML::vbs_col_tag, // table column vbsHTML::vbs_colgroup_tag, // table column group vbsHTML::vbs_credit_tag, // credit vbsHTML::vbs_del_tag, // deleted text vbsHTML::vbs_dfn_tag, // definition vbsHTML::vbs_dir_tag, // directory list vbsHTML::vbs_div_tag, // division vbsHTML::vbs_dl_tag, // definition list vbsHTML::vbs_dt_tag, // definition term vbsHTML::vbs_dd_tag, // definition definition vbsHTML::vbs_em_tag, // emphasized vbsHTML::vbs_embed_tag, // embed vbsHTML::vbs_fig_tag, // figure vbsHTML::vbs_fn_tag, // footnote vbsHTML::vbs_font_tag, // font vbsHTML::vbs_form_tag, // form vbsHTML::vbs_frame_tag, // frame vbsHTML::vbs_frameset_tag, // frame set vbsHTML::vbs_h1_tag, // heading 1 vbsHTML::vbs_h2_tag, // heading 2 vbsHTML::vbs_h3_tag, // heading 3 vbsHTML::vbs_h4_tag, // heading 4 vbsHTML::vbs_h5_tag, // heading 5 vbsHTML::vbs_h6_tag, // heading 6 vbsHTML::vbs_head_tag, // head vbsHTML::vbs_hr_tag, // horizontal rule vbsHTML::vbs_html_tag, // html vbsHTML::vbs_i_tag, // italic vbsHTML::vbs_iframe_tag, // frame - floating vbsHTML::vbs_img_tag, // inline image vbsHTML::vbs_input_tag, // form input vbsHTML::vbs_ins_tag, // inserted text vbsHTML::vbs_isindex_tag, // is index vbsHTML::vbs_kbd_tag, // keyboard vbsHTML::vbs_lang_tag, // language vbsHTML::vbs_lh_tag, // list heading vbsHTML::vbs_li_tag, // list item vbsHTML::vbs_link_tag, // link vbsHTML::vbs_listing_tag, // listing vbsHTML::vbs_map_tag, // map vbsHTML::vbs_marquee_tag, // marquee vbsHTML::vbs_math_tag, // math vbsHTML::vbs_menu_tag, // menu list vbsHTML::vbs_meta_tag, // meta vbsHTML::vbs_multicol_tag, // multi column text vbsHTML::vbs_nobr_tag, // no break vbsHTML::vbs_noframes_tag, // no frames vbsHTML::vbs_note_tag, // note vbsHTML::vbs_ol_tag, // ordered list vbsHTML::vbs_overlay_tag, // overlay vbsHTML::vbs_p_tag, // paragraph vbsHTML::vbs_param_tag, // parameters vbsHTML::vbs_person_tag, // person vbsHTML::vbs_plaintext_tag, // plain text vbsHTML::vbs_pre_tag, // preformatted text vbsHTML::vbs_q_tag, // quote vbsHTML::vbs_range_tag, // range vbsHTML::vbs_samp_tag, // sample vbsHTML::vbs_script_tag, // script vbsHTML::vbs_select_tag, // form select vbsHTML::vbs_small_tag, // small text vbsHTML::vbs_spacer_tag, // white space vbsHTML::vbs_spot_tag, // spot vbsHTML::vbs_strike_tag, // strikethrough vbsHTML::vbs_strong_tag, // strong vbsHTML::vbs_sub_tag, // subscript vbsHTML::vbs_sup_tag, // superscript vbsHTML::vbs_tab_tag, // horizontal tab vbsHTML::vbs_table_tag, // table vbsHTML::vbs_tbody_tag, // table body vbsHTML::vbs_td_tag, // table data vbsHTML::vbs_textarea_tag, // form text area vbsHTML::vbs_textflow_tag, // java applet textflow vbsHTML::vbs_tfoot_tag, // table footer vbsHTML::vbs_th_tag, // table header vbsHTML::vbs_thead_tag, // table head vbsHTML::vbs_title_tag, // title vbsHTML::vbs_tr_tag, // table row vbsHTML::vbs_tt_tag, // teletype vbsHTML::vbs_u_tag, // underlined vbsHTML::vbs_ul_tag, // unordered list vbsHTML::vbs_var_tag, // variable vbsHTML::vbs_wbr_tag, // word break vbsHTML::vbs_xmp_tag // example };
Data structure used to store the file position of an html tag, the tag itself, its attributes and instructions.
struct vbsHTMLTagInfo { // File information df_StreamPos start_tag; // This tag's starting position in the file df_StreamPos end_tag; // This tag's ending position in the file unsigned tag_length; // The complete length of this tag "< ---- >" // Tag information int tag_id; // Numerical value used to identify supported tags vbString tag_info; // Complete tag from opening to closing bracket vbString tag; // HTML tag vbString attr; // HTML tag attributes // Tag instructions int start_instruction; // True if start of tag instruction "<" int end_instruction; // True if end of a tag instruction "/x>" int has_attributes; // True if this tag has associated attributes };
vbsHTML::vbsHTML()
vbsHTML::~vbsHTML()
vbsHTML::ClearTagList()
vbsHTML::CloseFile()
vbsHTML::CollectHTMLTags()
vbsHTML::Copy()
vbsHTML::GetTag()
vbsHTML::GetTagID()
vbsHTML::GetTagList()
vbsHTML::HandleHTMLTag()
vbsHTML::LoadHTMLFile()
vbsHTML::LoadMemoryBuffer()
vbsHTML::NumProcessed()
vbsHTML::NumTags()
vbsHTML::ParseHTMLTagInfo()
vbsHTML::ProcessHTMLTags()
vbsHTML::vbsHTML()
- Default class constructor.vbsHTML::vbsHTML(const vbsHTML &ob)
- Class copy constructor. - Class destructor. - Public member function used to clear the tag list. - Public member function used to close the open HTML file after a load operation.int vbsHTML::CollectHTMLTags()
- Internal processing function used to collect all the HTML tags in a previously opened file. Returns a zero if no file errors occur or a non-zero corresponding to DiskFileB error code if an error occurs.int vbsHTML::CollectHTMLTags(const MemoryBuffer &membuf)
- Internal processing function used to collect all the HTML tags from a previously loaded MemoryBuffer object. Returns a zero if no errors occur or a non-zero to indicate a failure.void vbsHTML::Copy(const vbsHTML &ob)
- Internal processing function used to copy vbsHTML objects.char *vbsHTML::GetTag(int tag_id)
- Public member function that returns a null terminated string based on the value of the tag ID number. The "tag_id" variable must equal one of the integer constants defined in the tag ID enumeration.int vbsHTML::GetTagID(const vbString &tag)
- Public member function that returns a numerical value defined in the tag ID enumeration that represents the specified tag.vbDLList
void vbsHTML::HandleHTMLTag(int tag_id)
- Internal processing function used to execute the derived class version of a specific tag handler.int vbsHTML::LoadHTMLFile(const char *fname)
- Public member function used to open the specified HTML file and process all the tags collected from the file. Returns a zero if no disk file errors occur or a non-zero corresponding to DiskFileB error code if an error occurs.int vbsHTML::LoadMemoryBuffer(const MemoryBuffer &membuf)
- Public member function used to process all the tags stored in a MemoryBuffer object. Returns a zero if no errors occur or a non-zero to indicate a failure.unsigned vbsHTML::NumProcessed()
- Public member function used to retrieve the total number of tags processed. - Public member function used to retrieve the total number of tags collected.void vbsHTML::ParseHTMLTagInfo(vbsHTMLTagInfo &t)
- Public member function used to parse the specific tag information based on the string contained in the vbsHTMLTagInfo::tag_info member.int vbsHTML::ProcessHTMLTags()
- Internal processing function used to read and process all the tags in a previously opened file. Returns a zero if no disk file errors occur or a non-zero corresponding to DiskFileB error code if an error occurs.int vbsHTML::ProcessHTMLTags(const MemoryBuffer &membuf)
- Internal processing function used to read and process all the tags in a MemoryBuffer object. Returns a zero if no errors occur or a non-zero to indicate a failure.Derived class interface used to process tags.
void vbsHTML::Handle_INVALID_Tag() { // Override to handle INVALID tags } void vbsHTML::Handle_UNKNOWN_Tag() { // Override to handle UNKNOWN tags } void vbsHTML::Handle_UNKNOWN_SPECIAL_Tag() { // Override to handle unknown special tags starting // with an ampersand ending in a semicolon &xxxx; } void vbsHTML::Handle_COMMENT_Tag() { // Override to handle COMMENT tags } void vbsHTML::Handle_LESS_THEN_Tag() { // Override to handle a less than sign "<" } void vbsHTML::Handle_GREATER_THEN_Tag() { // Override to handle a greater than sign ">" } void vbsHTML::Handle_AMPERSAND_Tag() { // Override to handle an ampersand "&" } void vbsHTML::Handle_NB_SPACE_Tag() { // Override to handle a non-breaking space " " } void vbsHTML::Handle_QUOTE_Tag() { // Override to handle a quotation mark """ } void vbsHTML::Handle_EX_ASCII_Tag() { // Override to handle the extended ASCII character set "&#" } void vbsHTML::Handle_A_Tag() { // Override to handle ANCHOR tags } void vbsHTML::Handle_ABBREV_Tag() { // Override to handle ABBREVIATION tags } void vbsHTML::Handle_ACRONYM_Tag() { // Override to handle ACRONYM tags } void vbsHTML::Handle_ADDRESS_Tag() { // Override to handle ADDRESS tags } void vbsHTML::Handle_APPLET_Tag() { // Override to handle JAVA APPLET tag } void vbsHTML::Handle_AREA_Tag() { // Override to handle AREA tags } void vbsHTML::Handle_AU_Tag() { // Override to handle AUTHOR tags } void vbsHTML::Handle_AUTHOR_Tag() { // Override to handle AUTHOR tags } void vbsHTML::Handle_B_Tag() { // Override to handle BOLD tags } void vbsHTML::Handle_BANNER_Tag() { // Override to handle BANNER tags } void vbsHTML::Handle_BASE_Tag() { // Override to handle BASE tags } void vbsHTML::Handle_BASEFONT_Tag() { // Override to handle BASE FONT } void vbsHTML::Handle_BGSOUND_Tag() { // Override to handle BACKGROUND SOUND } void vbsHTML::Handle_BIG_Tag() { // Override to handle BIG text } void vbsHTML::Handle_BLINK_Tag() { // Override to handle BLINK tags } void vbsHTML::Handle_BLOCKQUOTE_Tag() { // Override to handle BLOCK QUOTE tags } void vbsHTML::Handle_BQ_Tag() { // Override to handle BLOCK QUOTE tags } void vbsHTML::Handle_BODY_Tag() { // Override to handle BODY tags } void vbsHTML::Handle_BR_Tag() { // Override to handle LINE BREAK tags } void vbsHTML::Handle_CAPTION_Tag() { // Override to handle CAPTION tags } void vbsHTML::Handle_CENTER_Tag() { // Override to handle CENTER tags } void vbsHTML::Handle_CITE_Tag() { // Override to handle CITATION tags } void vbsHTML::Handle_CODE_Tag() { // Override to handle CODE tags } void vbsHTML::Handle_COL_Tag() { // Override to handle TABLE Cols tags } void vbsHTML::Handle_COLGROUP_Tag() { // Override to handle TABLE Cols tags } void vbsHTML::Handle_CREDIT_Tag() { // Override to handle CREDIT tags } void vbsHTML::Handle_DEL_Tag() { // Override to handle DELETED text tags } void vbsHTML::Handle_DFN_Tag() { // Override to handle DEFINITION tags } void vbsHTML::Handle_DIR_Tag() { // Override to handle DIRECTORY list tags } void vbsHTML::Handle_DIV_Tag() { // Override to handle DIVISION tags } void vbsHTML::Handle_DL_Tag() { // Override to handle DEFINITION list tags } void vbsHTML::Handle_DT_Tag() { // Override to handle DEFINITION term tags } void vbsHTML::Handle_DD_Tag() { // Override to handle DEFINITION tags } void vbsHTML::Handle_EM_Tag() { // Override to handle EMPHASIZED tags } void vbsHTML::Handle_EMBED_Tag() { // Override to handle EMBED tags } void vbsHTML::Handle_FIG_Tag() { // Override to handle FIGURE tags } void vbsHTML::Handle_FN_Tag() { // Override to handle FOOTNOTE tags } void vbsHTML::Handle_FONT_Tag() { // Override to handle FONT tags } void vbsHTML::Handle_FORM_Tag() { // Override to handle FORM tags } void vbsHTML::Handle_FRAME_Tag() { // Override to handle FRAME tags } void vbsHTML::Handle_FRAMESET_Tag() { // Override to handle FRAME sets } void vbsHTML::Handle_H1_Tag() { // Override to handle HEADING 1 tags } void vbsHTML::Handle_H2_Tag() { // Override to handle HEADING 2 tags } void vbsHTML::Handle_H3_Tag() { // Override to handle HEADING 3 tags } void vbsHTML::Handle_H4_Tag() { // Override to handle HEADING 4 tags } void vbsHTML::Handle_H5_Tag() { // Override to handle HEADING 5 tags } void vbsHTML::Handle_H6_Tag() { // Override to handle HEADING 6 tags } void vbsHTML::Handle_HEAD_Tag() { // Override to handle HEAD tags } void vbsHTML::Handle_HR_Tag() { // Override to handle HORIZONTAL rules } void vbsHTML::Handle_HTML_Tag() { // Override to handle HTML tags } void vbsHTML::Handle_I_Tag() { // Override to handle ITALIC tags } void vbsHTML::Handle_IFRAME_Tag() { // Override to handle FRAME - Floating tag } void vbsHTML::Handle_IMG_Tag() { // Override to handle INLINE images } void vbsHTML::Handle_INPUT_Tag() { // Override to handle FORM input tags } void vbsHTML::Handle_INS_Tag() { // Override to handle INSERTED text } void vbsHTML::Handle_ISINDEX_Tag() { // Override to handle ISINDEX tag } void vbsHTML::Handle_KBD_Tag() { // Override to handle KEYBOARD tags } void vbsHTML::Handle_LANG_Tag() { // Override to handle LANGUAGE tags } void vbsHTML::Handle_LH_Tag() { // Override to handle LIST header tags } void vbsHTML::Handle_LI_Tag() { // Override to handle LIST item tags } void vbsHTML::Handle_LINK_Tag() { // Override to handle LINK tags } void vbsHTML::Handle_LISTING_Tag() { // Override to handle LISTING tags } void vbsHTML::Handle_MAP_Tag() { // Override to handle MAP tags } void vbsHTML::Handle_MARQUEE_Tag() { // Override to handle MARQUEE tags } void vbsHTML::Handle_MATH_Tag() { // Override to handle MATH tags } void vbsHTML::Handle_MENU_Tag() { // Override to handle MENU list tags } void vbsHTML::Handle_META_Tag() { // Override to handle META tags } void vbsHTML::Handle_MULTICOL_Tag() { // Override to handle MULTI COLUMN tags } void vbsHTML::Handle_NOBR_Tag() { // Override to handle NO BREAK tags } void vbsHTML::Handle_NOFRAMES_Tag() { // Override to handle NO FRAMES tags } void vbsHTML::Handle_NOTE_Tag() { // Override to handle NOTE tags } void vbsHTML::Handle_OL_Tag() { // Override to handle ORDERED list tags } void vbsHTML::Handle_OVERLAY_Tag() { // Override to handle OVERLAY tags } void vbsHTML::Handle_P_Tag() { // Override to handle PARAGRAPH tags } void vbsHTML::Handle_PARAM_Tag() { // Override to handle PARAMETERS tags } void vbsHTML::Handle_PERSON_Tag() { // Override to handle PERSON tags } void vbsHTML::Handle_PLAINTEXT_Tag() { // Override to handle PLAIN text tags } void vbsHTML::Handle_PRE_Tag() { // Override to handle PREFORMATTED text tags } void vbsHTML::Handle_Q_Tag() { // Override to handle QUOTE tags } void vbsHTML::Handle_RANGE_Tag() { // Override to handle RANGE tags } void vbsHTML::Handle_SAMP_Tag() { // Override to handle SAMPLE tags } void vbsHTML::Handle_SCRIPT_Tag() { // Override to handle SCRIPT tags } void vbsHTML::Handle_SELECT_Tag() { // Override to handle FORM SELECT tags } void vbsHTML::Handle_SMALL_Tag() { // Override to handle SMALL text tags } void vbsHTML::Handle_SPACER_Tag() { // Override to handle WHITE SPACE tags } void vbsHTML::Handle_SPOT_Tag() { // Override to handle SPOT tags } void vbsHTML::Handle_STRIKE_Tag() { // Override to handle STRIKETHROUGH tags } void vbsHTML::Handle_STRONG_Tag() { // Override to handle STRONG tags } void vbsHTML::Handle_SUB_Tag() { // Override to handle SUBSCRIPT tags } void vbsHTML::Handle_SUP_Tag() { // Override to handle SUPERSCRIPT tags } void vbsHTML::Handle_TAB_Tag() { // Override to handle HORIZONTAL TABS tags } void vbsHTML::Handle_TABLE_Tag() { // Override to handle TABLE tags } void vbsHTML::Handle_TBODY_Tag() { // Override to handle TABLE body tags } void vbsHTML::Handle_TD_Tag() { // Override to handle TABLE data tags } void vbsHTML::Handle_TEXTAREA_Tag() { // Override to handle FORM form tags } void vbsHTML::Handle_TEXTFLOW_Tag() { // Override to handle JAVA applet textflow } void vbsHTML::Handle_TFOOT_Tag() { // Override to handle TABLE footer tags } void vbsHTML::Handle_TH_Tag() { // Override to handle TABLE head } void vbsHTML::Handle_THEAD_Tag() { // Override to handle TABLE head tag } void vbsHTML::Handle_TITLE_Tag() { // Override to handle TITLE tags } void vbsHTML::Handle_TR_Tag() { // Override to handle TABLE row tags } void vbsHTML::Handle_TT_Tag() { // Override to handle TELETYPE tags } void vbsHTML::Handle_U_Tag() { // Override to handle UNDERLINED tags } void vbsHTML::Handle_UL_Tag() { // Override to handle UNORDERED list tags } void vbsHTML::Handle_VAR_Tag() { // Override to handle VARIABLE tags } void vbsHTML::Handle_WBR_Tag() { // Override to handle WORD BREAK tags } void vbsHTML::Handle_XMP_Tag() { // Override to handle EXAMPLE tags }
End Of Document |