Ada Example : parsing of html

This example is based on AWS which means Ada Web Server.
AWS is a Ada library for Web App , it is mush more than a Web server.


The following example is http web client parsing all html tags. It is designed to be customisable and adaptable.

This example shows also usefull usecase of Ada.Containers : Ada.Containers.Vectors and Ada.Containers.Hashed_Maps.


with AWS.Client;
with AWS.Response;
with Ada.Strings.Fixed;
with Ada.Strings.Unbounded;
with Ada.Command_Line;
 
with Config;
with Html_Processor;
 
procedure Http_Request is
 
   package String_U renames Ada.Strings.Unbounded;
   package CL renames Ada.Command_Line;
   Res           : AWS.Response.Data;
   Page_Raw_Data : String_U.Unbounded_String;
 
   CL_Config : Config.Config_Maps.Map;
 
begin
 
   if (CL.Argument_Count > 0) then
 
      for I in 2 .. CL.Argument_Count loop
 
         Config.Add_Param (CL_Config, CL.Argument (I));
 
      end loop;
 
      Res           := AWS.Client.Get (URL => CL.Argument (1));
      Page_Raw_Data := AWS.Response.Message_Body (Res);
      Html_Processor.Process_Request (CL_Config, Page_Raw_Data);
   end if;
 
end Http_Request;
 
with Ada.Strings.Fixed;
with Ada.Strings.Unbounded;
with Config;
 
package Html_Processor is
 
   package String_U renames Ada.Strings.Unbounded;
   package String_F renames Ada.Strings.Fixed;
 
 
   type Parser_State  is (Init, Html_Tag_Start,Html_Tag_End,Html_Tag,Script_Tag_Start,Script_Tag_End,Script_Tag);
 
   procedure Process_Request (S : String);
   procedure Process_Request (S : String_U.Unbounded_String);
 
   procedure Process_Request (C : Config.Config_Maps.Map; S : String);
   procedure Process_Request (C : Config.Config_Maps.Map; S : String_U.Unbounded_String);
 
end Html_Processor;
 
with Ada.Text_IO; use Ada.Text_IO;
with Ada.Characters.Latin_1;
 
with Stack;
 
package body Html_Processor is
 
   AddressStr : constant String_U.Unbounded_String := String_U.To_Unbounded_String ("--address");
   TagStr     : constant String_U.Unbounded_String := String_U.To_Unbounded_String ("--tag");
   CommentStr : constant String_U.Unbounded_String := String_U.To_Unbounded_String ("--comment");
   StackStr   : constant String_U.Unbounded_String := String_U.To_Unbounded_String ("--stack");
   ScriptStr  : constant String_U.Unbounded_String := String_U.To_Unbounded_String ("--script");
   Cfg        : Config.Config_Maps.Map             := Config.Config_Maps.Empty_Map;
 
   procedure Print_Element (S : String_U.Unbounded_String) is
   begin
      Put (Ada.Strings.Unbounded.To_String (S));
   end Print_Element;
 
   package Tag_Stack is new Stack (Size => 3000, Elem => String_U.Unbounded_String, Print => Print_Element);
 
   type Parsing_Contect (Default_Val : Integer) is record
      Stack             : Tag_Stack.Stack;
      Is_Inside_Tag     : Boolean := False;
      Is_Inside_Script  : Boolean := False;
      Is_Inside_Comment : Boolean := False;
      Is_Endding_Tag    : Boolean := False;
 
      State              : Parser_State := Init;
      Last_Tag_Index     : Natural      := Default_Val;
      Last_Content_Index : Natural      := Default_Val;
      Tag_Name           : String_U.Unbounded_String;
   end record;
 
   function Min (A, B : Natural) return Natural is
   begin
      if (A < B) then
         return A;
      else
         return B;
      end if;
   end Min;
 
   function Is_Empty (S : String) return Boolean is
      Result : Boolean   := True;
      Char   : Character := ' ';
   begin
      for I in S'First .. S'Last loop
         Char := S (I);
         if Char /= ' ' and
            Char /= Ada.Characters.Latin_1.CR and
            Char /= Ada.Characters.Latin_1.LF and
            Char /= Ada.Characters.Latin_1.HT
         then
            Result := False;
            exit;
         end if;
      end loop;
 
      return Result;
   end Is_Empty;
 
   function Get_Tag_Name (S : String; I : Natural) return String_U.Unbounded_String is
      Tag_Name       : String_U.Unbounded_String;
      Index_Tag      : Natural := 0;
      Index_Tag_Sort : Natural := 0;
      Index_Tag_Full : Natural := 0;
   begin
      Index_Tag_Sort := String_F.Index (S (I .. S'Last), ">");
      Index_Tag_Full := String_F.Index (S (I .. S'Last), " ");
 
      if (Index_Tag_Sort = 0) then
         Index_Tag_Sort := S'Last;
      end if;
      if (Index_Tag_Full = 0) then
         Index_Tag_Full := S'Last;
      end if;
 
      Index_Tag := Min (Index_Tag_Sort, Index_Tag_Full) - 1;
      Tag_Name  := String_U.To_Unbounded_String (S (I + 1 .. Index_Tag));
 
      return Tag_Name;
   end Get_Tag_Name;
 
   function Get_Full_Tag_Name (S : String; I : Natural) return String_U.Unbounded_String is
      Full_Tag_Name : String_U.Unbounded_String;
      Index_Tag     : Natural := 0;
 
   begin
      Index_Tag := String_F.Index (S (I .. S'Last), ">");
 
      Full_Tag_Name := String_U.To_Unbounded_String (S (I + 1 .. Index_Tag));
 
      return Full_Tag_Name;
   end Get_Full_Tag_Name;
 
   function Get_End_Tag_Name (Tag : String_U.Unbounded_String) return String_U.Unbounded_String is
   begin
      return String_U.Tail (Tag, String_U.Length (Tag) - 1);
   end Get_End_Tag_Name;
 
   function Is_Tag_Open
     (S    : String;
      I    : Natural;
      Tag  : String_U.Unbounded_String)
      return Boolean
   is
      use type String_U.Unbounded_String;
      Index_Tag   : Natural := String_F.Index (S (I .. S'Last), ">");
      Is_Tag_Open : Boolean := S (Index_Tag - 1) /= '/';
   begin
      Is_Tag_Open := Is_Tag_Open and String_U.Head (Tag, 1) /= "!";
      Is_Tag_Open := Is_Tag_Open and Tag /= "br";
      Is_Tag_Open := Is_Tag_Open and Tag /= "hr";
      Is_Tag_Open := Is_Tag_Open and Tag /= "img";
      Is_Tag_Open := Is_Tag_Open and Tag /= "input";
      Is_Tag_Open := Is_Tag_Open and Tag /= "meta";
      Is_Tag_Open := Is_Tag_Open and Tag /= "link";
      return Is_Tag_Open;
   end Is_Tag_Open;
 
   function Is_End_Tag (Tag : String_U.Unbounded_String) return Boolean is
      use type String_U.Unbounded_String;
      Is_End_Tag : Boolean := String_U.Head (Tag, 1) = "/";
   begin
 
      return Is_End_Tag;
   end Is_End_Tag;
 
   function Is_Script_Tag (Tag : String_U.Unbounded_String) return Boolean is
      use type String_U.Unbounded_String;
      Is_Script_Tag : Boolean := Tag = "script" or Tag = "/script";
   begin
 
      return Is_Script_Tag;
   end Is_Script_Tag;
 
   function Is_Comment_Tag (Tag : String_U.Unbounded_String) return Boolean is
      use type String_U.Unbounded_String;
      Is_Comment_Tag : Boolean := String_U.Head (Tag, 3) = "!--";
   begin
 
      return Is_Comment_Tag;
   end Is_Comment_Tag;
 
   procedure Display_Content (S : in String; I : in Integer; C : in out Parsing_Contect) is
   begin
 
      if ((Cfg.Contains (ScriptStr) and C.Is_Inside_Script) or  (not C.Is_Inside_Script)) then
         if (not C.Is_Inside_Comment and (I > 2))
           and then (not Is_Empty (S (C.Last_Content_Index .. I - 1)))
         then
            if (Cfg.Contains (StackStr)) then
               Tag_Stack.Print_All (C.Stack);
            end if;
            if (Cfg.Contains (AddressStr)) then
               Put (" [" & C.Last_Content_Index'Img & " .." & Natural'Image (I) & " ]");
            end if;
            Put_Line (S (C.Last_Content_Index .. I - 1));
         end if;
      end if;
   end Display_Content;
 
   procedure Display_Tag (S : in String; I : in Integer; C : in out Parsing_Contect) is
   begin
      if ((Cfg.Contains (TagStr) and not C.Is_Inside_Comment) or
          (Cfg.Contains (CommentStr) and C.Is_Inside_Comment))
      then
         if ((I > 2)) and then (not Is_Empty (S (C.Last_Tag_Index .. I - 1))) then
            if (C.Is_Inside_Comment) then
               Put ("Comment ");
            else
               Put ("Tag ");
            end if;
            if (Cfg.Contains (AddressStr)) then
               Put (" [" & C.Last_Tag_Index'Img & " .." & Natural'Image (I) & " ]");
            end if;
            Put_Line (S (C.Last_Tag_Index .. I - 1));
         end if;
      end if;
   end Display_Tag;
 
   procedure Process_Request_Script (S : in String; I : in Integer; C : in out Parsing_Contect) is
      End_Tag_Name : String_U.Unbounded_String;
      use type String_U.Unbounded_String;
   begin
      if (Is_Script_Tag (C.Tag_Name) and C.Is_Endding_Tag) then
         End_Tag_Name := Get_End_Tag_Name (C.Tag_Name);
         Tag_Stack.Pop_Until (C.Stack, End_Tag_Name);
         Display_Content (S, I, C);
         C.Last_Tag_Index   := I + 1;
         C.Is_Inside_Script := False;
         C.Is_Inside_Tag    := True;
      end if;
   end Process_Request_Script;
 
   procedure Process_Request_All_Tag (S : in String; I : in Integer; C : in out Parsing_Contect) is
      End_Tag_Name : String_U.Unbounded_String;
      use type String_U.Unbounded_String;
 
   begin
 
      C.Is_Inside_Script := Is_Script_Tag (C.Tag_Name) and not C.Is_Endding_Tag;
 
      if (C.Is_Endding_Tag) then
 
         End_Tag_Name := Get_End_Tag_Name (C.Tag_Name);
 
         Tag_Stack.Pop_Until (C.Stack, End_Tag_Name);
      else
         if (Is_Tag_Open (S, I, C.Tag_Name)) then
            Tag_Stack.Push (C.Stack, C.Tag_Name);
         end if;
      end if;
      C.Is_Inside_Tag := True;
   end Process_Request_All_Tag;
 
   procedure Process_Request_Internal (S : String) is
      Char : Character := ' ';
      C    : Parsing_Contect (S'First);
 
      use type String_U.Unbounded_String;
   begin
 
      for I in S'Range loop
         Char := S (I);
 
         -- Update Comment meta info
         if (Char = '>' and C.Is_Inside_Comment and I > 2)
           and then (S (I - 1) = '-' and S (I - 2) = '-')
         then
            Display_Tag (S, I, C);
            C.Is_Inside_Comment  := False;
            C.Last_Content_Index := I + 1;
         end if;
 
         if (Char = '<') and not C.Is_Inside_Comment then
            C.Tag_Name          := Get_Tag_Name (S, I);
            C.Is_Inside_Comment := Is_Comment_Tag (C.Tag_Name);
            if (C.Is_Inside_Comment) then
               C.Last_Tag_Index := I + 1;
               Display_Content (S, I, C);
            end if;
         end if;
 
         -- Update Html meta info
         if (Char = '>' and C.Is_Inside_Tag) then
            Display_Tag (S, I, C);
            C.Is_Inside_Tag      := False;
            C.Last_Content_Index := I + 1;
         end if;
 
         if (Char = '<') and not C.Is_Inside_Comment then
 
            C.Tag_Name       := Get_Tag_Name (S, I);
            C.Is_Endding_Tag := Is_End_Tag (C.Tag_Name);
 
            if (C.Is_Inside_Script) then
               Process_Request_Script (S, I, C);
            else
               Display_Content (S, I, C);
               C.Last_Tag_Index := I + 1;
               Process_Request_All_Tag (S, I, C);
            end if;
 
         end if;
 
      end loop;
 
      Put_Line ("Result :");
      Tag_Stack.Print_All (C.Stack);
   end Process_Request_Internal;
 
   procedure Process_Request (S : String) is
   begin
      Put_Line ("Process_Request");
      Process_Request_Internal (S);
   end Process_Request;
 
   procedure Process_Request (S : String_U.Unbounded_String) is
   begin
      Put_Line ("Process_Request");
      Process_Request_Internal (Ada.Strings.Unbounded.To_String (S));
   end Process_Request;
 
   procedure Process_Request (C : Config.Config_Maps.Map; S : String) is
   begin
      Put_Line ("Process_Request");
      Cfg := C;
      Process_Request_Internal (S);
   end Process_Request;
 
   procedure Process_Request (C : Config.Config_Maps.Map; S : Ada.Strings.Unbounded.Unbounded_String) is
   begin
      Put_Line ("Process_Request");
      Cfg := C;
      Process_Request_Internal (Ada.Strings.Unbounded.To_String (S));
   end Process_Request;
 
end Html_Processor;
 
with Ada.Strings.Unbounded;
with Ada.Strings.Fixed;
with Ada.Containers.Hashed_Maps;
with Ada.Strings.Unbounded.Hash;
with Ada.Strings.Fixed.Hash;
 
package Config is
 
   package String_U renames Ada.Strings.Unbounded;
   package String_F renames Ada.Strings.Fixed;
   use type String_U.Unbounded_String;
 
   function Hashed (X : String_U.Unbounded_String) return Ada.Containers.Hash_Type renames
     Ada.Strings.Unbounded.Hash;
 
   package Config_Maps is new Ada.Containers.Hashed_Maps (
      Key_Type        => String_U.Unbounded_String,
      Element_Type    => String_U.Unbounded_String,
      Hash            => Hashed,
      Equivalent_Keys => "=");
 
 
 
   procedure Add_Param (Cfg : in out Config.Config_Maps.Map; Arg : String);
 
end Config;
 
with Ada.Containers;
with Ada.Text_IO;
 
package body Config is
 
   procedure Get_Key_Value (S : in String; key, value : out String_U.Unbounded_String) is
      Index : Natural;
   begin
      Index := String_F.Index (S, "=");
      if (Index = 0) then
         key   := String_U.To_Unbounded_String (S);
         value := String_U.To_Unbounded_String ("");
      else
         key   := String_U.To_Unbounded_String (S (S'First .. Index - 1));
         value := String_U.To_Unbounded_String (S (Index + 1 .. S'Last));
      end if;
   end Get_Key_Value;
 
   procedure Add_Param (Cfg : in out Config.Config_Maps.Map; Arg : in String) is
 
      key, value : String_U.Unbounded_String;
   begin
 
      Get_Key_Value (Arg, key, value);
      Ada.Text_IO.Put_Line ("key" & String_U.To_String (key));
      Config.Config_Maps.Insert (Container => Cfg, Key => key, New_Item => value);
 
   end Add_Param;
 
end Config;
 
with Ada.Containers.Vectors;
generic
   Size : Integer;
   type Elem is private;
 
   with procedure Print (E : in Elem);
 
package Stack is
   type Stack is limited private;
 
   subtype Count_Type is Integer range 0 .. Size;
   subtype Index_Type is Count_Type range 1 .. Size;
   package Stack_Vectors is new Ada.Containers.Vectors (Index_Type => Index_Type, Element_Type => Elem);
 
   procedure Push (S : in out Stack; E : in Elem);
   procedure Pop (S : in out Stack; E : out Elem);
   procedure Pop (S : in out Stack);
   procedure Pop_Until (S : in out Stack; E : in Elem);
   procedure Print_All (S : in Stack);
   function Length (S : in Stack) return Count_Type;
   OVERFLOW, UNDERFLOW : exception;
 
private
   type Stack is record
      Len : Count_Type := 0;
      V   : Stack_Vectors.Vector;
   end record;
end Stack;
 
with Ada.Text_IO; use Ada.Text_IO;
package body Stack is
   procedure Push (S : in out Stack; E : in Elem) is
   begin
      if S.Len = Size then
         raise OVERFLOW;
      end if;
      S.Len := S.Len + 1;
      Stack_Vectors.Append (S.V, E);
   end Push;
 
   procedure Pop (S : in out Stack; E : out Elem) is
   begin
      if S.Len = 0 then
         raise UNDERFLOW;
      end if;
 
      E := Stack_Vectors.Last_Element (S.V);
      Stack_Vectors.Delete_Last (S.V);
      S.Len := S.Len - 1;
   end Pop;
 
   -- pop until E is found
   procedure Pop_Until (S : in out Stack; E : in Elem) is
      Element : Elem;
 
      Nb_Deleted_Item : Ada.Containers.Count_Type := 0;
      Length          : Count_Type                := S.Len;
      Found           : Boolean                   := False;
      End_Of_Stack    : Boolean                   := False;
      Pop_Cursor      : Stack_Vectors.Cursor      := Stack_Vectors.Last (S.V);
      use Ada.Containers;
 
   begin
 
      while not Found and not End_Of_Stack loop
 
         if Length = 0 then
            End_Of_Stack := True;
         else
            Nb_Deleted_Item := Nb_Deleted_Item + 1;
            Length          := Length - 1;
            Element         := Stack_Vectors.Element (Pop_Cursor);
            if (E = Element) then
               Found := True;
            end if;
            Pop_Cursor := Stack_Vectors.Previous (Pop_Cursor);
         end if;
      end loop;
 
      if (Found) then
 
         Stack_Vectors.Delete_Last (S.V, Nb_Deleted_Item);
         S.Len := S.Len - Count_Type (Nb_Deleted_Item);
      else
         Put ("Pop_Until Error:");
         Print (E);
         Put ("  Stack :");
         Print_All (S => S);
         Put_Line ("");
      end if;
   end Pop_Until;
 
   procedure Pop (S : in out Stack) is
   begin
      if S.Len = 0 then
         raise UNDERFLOW;
      end if;
      Stack_Vectors.Delete_Last (S.V);
      S.Len := S.Len - 1;
   end Pop;
 
   procedure Print_All (S : in Stack) is
   begin
      if (S.Len > 0) then
         for I in 1 .. S.Len - 1 loop
            Print (Stack_Vectors.Element (S.V, I));
            Put (":");
         end loop;
         Print (Stack_Vectors.Element (S.V, S.Len));
      else
         Put ("Empty Stack");
      end if;
 
   end Print_All;
 
   function Length (S : in Stack) return Count_Type is
   begin
      return S.Len;
   end Length;
 
end Stack;