Ada Example : parsing of html
This example is based on AWS which means Ada Web Server.
AWS is a Ada library for Web App , it is mush more than a Web server.
The following example is http web client parsing all html tags.
It is designed to be customisable and adaptable.
This example shows also usefull usecase of Ada.Containers : Ada.Containers.Vectors and Ada.Containers.Hashed_Maps.
with AWS.Client; with AWS.Response; with Ada.Strings.Fixed; with Ada.Strings.Unbounded; with Ada.Command_Line; with Config; with Html_Processor; procedure Http_Request is package String_U renames Ada.Strings.Unbounded; package CL renames Ada.Command_Line; Res : AWS.Response.Data; Page_Raw_Data : String_U.Unbounded_String; CL_Config : Config.Config_Maps.Map; begin if (CL.Argument_Count > 0) then for I in 2 .. CL.Argument_Count loop Config.Add_Param (CL_Config, CL.Argument (I)); end loop; Res := AWS.Client.Get (URL => CL.Argument (1)); Page_Raw_Data := AWS.Response.Message_Body (Res); Html_Processor.Process_Request (CL_Config, Page_Raw_Data); end if; end Http_Request;
with Ada.Strings.Fixed; with Ada.Strings.Unbounded; with Config; package Html_Processor is package String_U renames Ada.Strings.Unbounded; package String_F renames Ada.Strings.Fixed; type Parser_State is (Init, Html_Tag_Start,Html_Tag_End,Html_Tag,Script_Tag_Start,Script_Tag_End,Script_Tag); procedure Process_Request (S : String); procedure Process_Request (S : String_U.Unbounded_String); procedure Process_Request (C : Config.Config_Maps.Map; S : String); procedure Process_Request (C : Config.Config_Maps.Map; S : String_U.Unbounded_String); end Html_Processor;
with Ada.Text_IO; use Ada.Text_IO; with Ada.Characters.Latin_1; with Stack; package body Html_Processor is AddressStr : constant String_U.Unbounded_String := String_U.To_Unbounded_String ("--address"); TagStr : constant String_U.Unbounded_String := String_U.To_Unbounded_String ("--tag"); CommentStr : constant String_U.Unbounded_String := String_U.To_Unbounded_String ("--comment"); StackStr : constant String_U.Unbounded_String := String_U.To_Unbounded_String ("--stack"); ScriptStr : constant String_U.Unbounded_String := String_U.To_Unbounded_String ("--script"); Cfg : Config.Config_Maps.Map := Config.Config_Maps.Empty_Map; procedure Print_Element (S : String_U.Unbounded_String) is begin Put (Ada.Strings.Unbounded.To_String (S)); end Print_Element; package Tag_Stack is new Stack (Size => 3000, Elem => String_U.Unbounded_String, Print => Print_Element); type Parsing_Contect (Default_Val : Integer) is record Stack : Tag_Stack.Stack; Is_Inside_Tag : Boolean := False; Is_Inside_Script : Boolean := False; Is_Inside_Comment : Boolean := False; Is_Endding_Tag : Boolean := False; State : Parser_State := Init; Last_Tag_Index : Natural := Default_Val; Last_Content_Index : Natural := Default_Val; Tag_Name : String_U.Unbounded_String; end record; function Min (A, B : Natural) return Natural is begin if (A < B) then return A; else return B; end if; end Min; function Is_Empty (S : String) return Boolean is Result : Boolean := True; Char : Character := ' '; begin for I in S'First .. S'Last loop Char := S (I); if Char /= ' ' and Char /= Ada.Characters.Latin_1.CR and Char /= Ada.Characters.Latin_1.LF and Char /= Ada.Characters.Latin_1.HT then Result := False; exit; end if; end loop; return Result; end Is_Empty; function Get_Tag_Name (S : String; I : Natural) return String_U.Unbounded_String is Tag_Name : String_U.Unbounded_String; Index_Tag : Natural := 0; Index_Tag_Sort : Natural := 0; Index_Tag_Full : Natural := 0; begin Index_Tag_Sort := String_F.Index (S (I .. S'Last), ">"); Index_Tag_Full := String_F.Index (S (I .. S'Last), " "); if (Index_Tag_Sort = 0) then Index_Tag_Sort := S'Last; end if; if (Index_Tag_Full = 0) then Index_Tag_Full := S'Last; end if; Index_Tag := Min (Index_Tag_Sort, Index_Tag_Full) - 1; Tag_Name := String_U.To_Unbounded_String (S (I + 1 .. Index_Tag)); return Tag_Name; end Get_Tag_Name; function Get_Full_Tag_Name (S : String; I : Natural) return String_U.Unbounded_String is Full_Tag_Name : String_U.Unbounded_String; Index_Tag : Natural := 0; begin Index_Tag := String_F.Index (S (I .. S'Last), ">"); Full_Tag_Name := String_U.To_Unbounded_String (S (I + 1 .. Index_Tag)); return Full_Tag_Name; end Get_Full_Tag_Name; function Get_End_Tag_Name (Tag : String_U.Unbounded_String) return String_U.Unbounded_String is begin return String_U.Tail (Tag, String_U.Length (Tag) - 1); end Get_End_Tag_Name; function Is_Tag_Open (S : String; I : Natural; Tag : String_U.Unbounded_String) return Boolean is use type String_U.Unbounded_String; Index_Tag : Natural := String_F.Index (S (I .. S'Last), ">"); Is_Tag_Open : Boolean := S (Index_Tag - 1) /= '/'; begin Is_Tag_Open := Is_Tag_Open and String_U.Head (Tag, 1) /= "!"; Is_Tag_Open := Is_Tag_Open and Tag /= "br"; Is_Tag_Open := Is_Tag_Open and Tag /= "hr"; Is_Tag_Open := Is_Tag_Open and Tag /= "img"; Is_Tag_Open := Is_Tag_Open and Tag /= "input"; Is_Tag_Open := Is_Tag_Open and Tag /= "meta"; Is_Tag_Open := Is_Tag_Open and Tag /= "link"; return Is_Tag_Open; end Is_Tag_Open; function Is_End_Tag (Tag : String_U.Unbounded_String) return Boolean is use type String_U.Unbounded_String; Is_End_Tag : Boolean := String_U.Head (Tag, 1) = "/"; begin return Is_End_Tag; end Is_End_Tag; function Is_Script_Tag (Tag : String_U.Unbounded_String) return Boolean is use type String_U.Unbounded_String; Is_Script_Tag : Boolean := Tag = "script" or Tag = "/script"; begin return Is_Script_Tag; end Is_Script_Tag; function Is_Comment_Tag (Tag : String_U.Unbounded_String) return Boolean is use type String_U.Unbounded_String; Is_Comment_Tag : Boolean := String_U.Head (Tag, 3) = "!--"; begin return Is_Comment_Tag; end Is_Comment_Tag; procedure Display_Content (S : in String; I : in Integer; C : in out Parsing_Contect) is begin if ((Cfg.Contains (ScriptStr) and C.Is_Inside_Script) or (not C.Is_Inside_Script)) then if (not C.Is_Inside_Comment and (I > 2)) and then (not Is_Empty (S (C.Last_Content_Index .. I - 1))) then if (Cfg.Contains (StackStr)) then Tag_Stack.Print_All (C.Stack); end if; if (Cfg.Contains (AddressStr)) then Put (" [" & C.Last_Content_Index'Img & " .." & Natural'Image (I) & " ]"); end if; Put_Line (S (C.Last_Content_Index .. I - 1)); end if; end if; end Display_Content; procedure Display_Tag (S : in String; I : in Integer; C : in out Parsing_Contect) is begin if ((Cfg.Contains (TagStr) and not C.Is_Inside_Comment) or (Cfg.Contains (CommentStr) and C.Is_Inside_Comment)) then if ((I > 2)) and then (not Is_Empty (S (C.Last_Tag_Index .. I - 1))) then if (C.Is_Inside_Comment) then Put ("Comment "); else Put ("Tag "); end if; if (Cfg.Contains (AddressStr)) then Put (" [" & C.Last_Tag_Index'Img & " .." & Natural'Image (I) & " ]"); end if; Put_Line (S (C.Last_Tag_Index .. I - 1)); end if; end if; end Display_Tag; procedure Process_Request_Script (S : in String; I : in Integer; C : in out Parsing_Contect) is End_Tag_Name : String_U.Unbounded_String; use type String_U.Unbounded_String; begin if (Is_Script_Tag (C.Tag_Name) and C.Is_Endding_Tag) then End_Tag_Name := Get_End_Tag_Name (C.Tag_Name); Tag_Stack.Pop_Until (C.Stack, End_Tag_Name); Display_Content (S, I, C); C.Last_Tag_Index := I + 1; C.Is_Inside_Script := False; C.Is_Inside_Tag := True; end if; end Process_Request_Script; procedure Process_Request_All_Tag (S : in String; I : in Integer; C : in out Parsing_Contect) is End_Tag_Name : String_U.Unbounded_String; use type String_U.Unbounded_String; begin C.Is_Inside_Script := Is_Script_Tag (C.Tag_Name) and not C.Is_Endding_Tag; if (C.Is_Endding_Tag) then End_Tag_Name := Get_End_Tag_Name (C.Tag_Name); Tag_Stack.Pop_Until (C.Stack, End_Tag_Name); else if (Is_Tag_Open (S, I, C.Tag_Name)) then Tag_Stack.Push (C.Stack, C.Tag_Name); end if; end if; C.Is_Inside_Tag := True; end Process_Request_All_Tag; procedure Process_Request_Internal (S : String) is Char : Character := ' '; C : Parsing_Contect (S'First); use type String_U.Unbounded_String; begin for I in S'Range loop Char := S (I); -- Update Comment meta info if (Char = '>' and C.Is_Inside_Comment and I > 2) and then (S (I - 1) = '-' and S (I - 2) = '-') then Display_Tag (S, I, C); C.Is_Inside_Comment := False; C.Last_Content_Index := I + 1; end if; if (Char = '<') and not C.Is_Inside_Comment then C.Tag_Name := Get_Tag_Name (S, I); C.Is_Inside_Comment := Is_Comment_Tag (C.Tag_Name); if (C.Is_Inside_Comment) then C.Last_Tag_Index := I + 1; Display_Content (S, I, C); end if; end if; -- Update Html meta info if (Char = '>' and C.Is_Inside_Tag) then Display_Tag (S, I, C); C.Is_Inside_Tag := False; C.Last_Content_Index := I + 1; end if; if (Char = '<') and not C.Is_Inside_Comment then C.Tag_Name := Get_Tag_Name (S, I); C.Is_Endding_Tag := Is_End_Tag (C.Tag_Name); if (C.Is_Inside_Script) then Process_Request_Script (S, I, C); else Display_Content (S, I, C); C.Last_Tag_Index := I + 1; Process_Request_All_Tag (S, I, C); end if; end if; end loop; Put_Line ("Result :"); Tag_Stack.Print_All (C.Stack); end Process_Request_Internal; procedure Process_Request (S : String) is begin Put_Line ("Process_Request"); Process_Request_Internal (S); end Process_Request; procedure Process_Request (S : String_U.Unbounded_String) is begin Put_Line ("Process_Request"); Process_Request_Internal (Ada.Strings.Unbounded.To_String (S)); end Process_Request; procedure Process_Request (C : Config.Config_Maps.Map; S : String) is begin Put_Line ("Process_Request"); Cfg := C; Process_Request_Internal (S); end Process_Request; procedure Process_Request (C : Config.Config_Maps.Map; S : Ada.Strings.Unbounded.Unbounded_String) is begin Put_Line ("Process_Request"); Cfg := C; Process_Request_Internal (Ada.Strings.Unbounded.To_String (S)); end Process_Request; end Html_Processor;
with Ada.Strings.Unbounded; with Ada.Strings.Fixed; with Ada.Containers.Hashed_Maps; with Ada.Strings.Unbounded.Hash; with Ada.Strings.Fixed.Hash; package Config is package String_U renames Ada.Strings.Unbounded; package String_F renames Ada.Strings.Fixed; use type String_U.Unbounded_String; function Hashed (X : String_U.Unbounded_String) return Ada.Containers.Hash_Type renames Ada.Strings.Unbounded.Hash; package Config_Maps is new Ada.Containers.Hashed_Maps ( Key_Type => String_U.Unbounded_String, Element_Type => String_U.Unbounded_String, Hash => Hashed, Equivalent_Keys => "="); procedure Add_Param (Cfg : in out Config.Config_Maps.Map; Arg : String); end Config;
with Ada.Containers; with Ada.Text_IO; package body Config is procedure Get_Key_Value (S : in String; key, value : out String_U.Unbounded_String) is Index : Natural; begin Index := String_F.Index (S, "="); if (Index = 0) then key := String_U.To_Unbounded_String (S); value := String_U.To_Unbounded_String (""); else key := String_U.To_Unbounded_String (S (S'First .. Index - 1)); value := String_U.To_Unbounded_String (S (Index + 1 .. S'Last)); end if; end Get_Key_Value; procedure Add_Param (Cfg : in out Config.Config_Maps.Map; Arg : in String) is key, value : String_U.Unbounded_String; begin Get_Key_Value (Arg, key, value); Ada.Text_IO.Put_Line ("key" & String_U.To_String (key)); Config.Config_Maps.Insert (Container => Cfg, Key => key, New_Item => value); end Add_Param; end Config;
with Ada.Containers.Vectors; generic Size : Integer; type Elem is private; with procedure Print (E : in Elem); package Stack is type Stack is limited private; subtype Count_Type is Integer range 0 .. Size; subtype Index_Type is Count_Type range 1 .. Size; package Stack_Vectors is new Ada.Containers.Vectors (Index_Type => Index_Type, Element_Type => Elem); procedure Push (S : in out Stack; E : in Elem); procedure Pop (S : in out Stack; E : out Elem); procedure Pop (S : in out Stack); procedure Pop_Until (S : in out Stack; E : in Elem); procedure Print_All (S : in Stack); function Length (S : in Stack) return Count_Type; OVERFLOW, UNDERFLOW : exception; private type Stack is record Len : Count_Type := 0; V : Stack_Vectors.Vector; end record; end Stack;
with Ada.Text_IO; use Ada.Text_IO; package body Stack is procedure Push (S : in out Stack; E : in Elem) is begin if S.Len = Size then raise OVERFLOW; end if; S.Len := S.Len + 1; Stack_Vectors.Append (S.V, E); end Push; procedure Pop (S : in out Stack; E : out Elem) is begin if S.Len = 0 then raise UNDERFLOW; end if; E := Stack_Vectors.Last_Element (S.V); Stack_Vectors.Delete_Last (S.V); S.Len := S.Len - 1; end Pop; -- pop until E is found procedure Pop_Until (S : in out Stack; E : in Elem) is Element : Elem; Nb_Deleted_Item : Ada.Containers.Count_Type := 0; Length : Count_Type := S.Len; Found : Boolean := False; End_Of_Stack : Boolean := False; Pop_Cursor : Stack_Vectors.Cursor := Stack_Vectors.Last (S.V); use Ada.Containers; begin while not Found and not End_Of_Stack loop if Length = 0 then End_Of_Stack := True; else Nb_Deleted_Item := Nb_Deleted_Item + 1; Length := Length - 1; Element := Stack_Vectors.Element (Pop_Cursor); if (E = Element) then Found := True; end if; Pop_Cursor := Stack_Vectors.Previous (Pop_Cursor); end if; end loop; if (Found) then Stack_Vectors.Delete_Last (S.V, Nb_Deleted_Item); S.Len := S.Len - Count_Type (Nb_Deleted_Item); else Put ("Pop_Until Error:"); Print (E); Put (" Stack :"); Print_All (S => S); Put_Line (""); end if; end Pop_Until; procedure Pop (S : in out Stack) is begin if S.Len = 0 then raise UNDERFLOW; end if; Stack_Vectors.Delete_Last (S.V); S.Len := S.Len - 1; end Pop; procedure Print_All (S : in Stack) is begin if (S.Len > 0) then for I in 1 .. S.Len - 1 loop Print (Stack_Vectors.Element (S.V, I)); Put (":"); end loop; Print (Stack_Vectors.Element (S.V, S.Len)); else Put ("Empty Stack"); end if; end Print_All; function Length (S : in Stack) return Count_Type is begin return S.Len; end Length; end Stack;