Einzelnen Beitrag anzeigen

Rolf Frei

Registriert seit: 19. Jun 2006
647 Beiträge
 
Delphi 11 Alexandria
 
#8

AW: RegExpression Hilfe - E-Mail prüfen

  Alt 20. Jun 2022, 16:19
Ach, warum ist das immer alles so kompliziert..

Dann nehme ich die Umlaute wieder raus, bis ich Lust habe, diese Kodierung einzubauen.

Ich danke Dir herzlich!

LG
Diese Codierung nennt sich Punycode. Hier habe ich dir eine Klasse, die du nutzen kannst. Habe nicht nachgeschaut ob es da in aktuellen Delphies schon sowas in den System Units gibt. Einfach vor dem Regex, deinen E-Mail per Punycode Encode codieren und dann diesen String überprüfen. Habe da zur Einfachheit noch eine Funktion gemacht (RF_PunyEncodeMailAddr), die du nutzen kannst, ohne das Objekt selber erstellen zu müssen. Ausserdem habe ich noch eine Funktion zum Üperprüfen der E-Mail angehängt (RF_ValidEmail), die ohne Regex auskommt und deutlich schneller ist.

Delphi-Quellcode:
  punycode_email := RF_PunyEncodeMailAddr('Joerg.Mueller@müller.de');
  if not RF_ValidEMail(punycode_email) then
    raise Exception.Create('E-Mail ungültig');
Bitte beachte, das Umlaute nur in der Domäne gültig sind und die Adresse davor keine Umlaute enthalten darf.


Delphi-Quellcode:
// Punycode Konvertierung
//
// Punycode: A Bootstring encoding of Unicode for Internationalized Domain Names in Applications (IDNA)
// http://www.rfc-editor.org/rfc/rfc3492.txt
//
// Delphi-Unit von Daniel Mitte (2005)
// Original-Code von http://www.activevb.de
//
// Beispiel:
// pc := TPunyClass.Create;
// e := pc.Encode('müller'); // Verschlüsselt 'müller' zu 'mller-kva'
// d := pc.Decode(e); // Entschlüsselt 'mller-kva' zu 'müller'
// pc.Free;

type
  TPunyClass = class
  private
    function GetMinCodePoint(const n: Longint; const data: WideString): Longint;
    function IsBasic(c: WideString; const n: Longint): Boolean;
    function Adapt(const delta, numpoints: Longint; const firsttime: Boolean): Longint;
    function Digit2Codepoint(const d: Longint): Longint;
    function Codepoint2Digit(const c: Longint): Longint;
    function UInt(i: Longint): Longint;
    function Asc(s: WideString): Longint;
    function AscW(s: WideString): Longint;
    function PosRev(sub, text: WideString): Longint;
  public
    function Encode(const input: WideString): WideString;
    function Decode(const input: WideString): WideString;
  end;


type
  TMyArrayOfString = array of string;

procedure GTL_StringExplode(var a: TMyArrayOfString; const Border, S: string);
var
  aStr: string;
  anIdx: Integer;
  aPos: integer;
begin
  anIdx := 0;
  aStr := S + Border;
  repeat
    SetLength(a, anIdx+1);
    aPos := Pos(Border, aStr);
    a[anIdx] := Copy(aStr, 0, aPos - 1);
    Delete(aStr, 1, Length(a[anIdx] + Border));
    Inc(anIdx);
  until aStr = '';
end;


function RF_PunyEncodeMailAddr(const AEMail: string): string;
var
  aStrings: TMyArrayOfString;
  aPunyStr: string;
  anIdx: integer;
begin
  Result := '';
  anIdx := Pos('@', AEMail);

  Result := Copy(AEMail, 1, anIdx-1);
  if anIdx > 0 then
    Result := Result +'@';
  aPunyStr := Copy(AEMail, anIdx+1, 255);

  GTL_StringExplode(aStrings, '.', aPunyStr);

  with TPunyClass.Create do
  try
    for anIdx := Low(aStrings) to High(aStrings) do
    begin
      aPunyStr := Encode(aStrings[anIdx]);
      if aPunyStr <> aStrings[anIdx] then
        aPunyStr := 'xn--' + aPunyStr;
      if anIdx > Low(aStrings) then
        aPunyStr := '.' + aPunyStr;
      Result := Result + aPunyStr;
    end;
  finally
    Free;
  end;
end;



const
  BASE: Longint = 36;
  TMIN: Longint = 1;
  TMAX: Longint = 26;
  SKEW: Longint = 38;
  DAMP: Longint = 700;
  INITIAL_BIAS: Longint = 72;
  INITIAL_N: Longint = 128;
  Delimiter: WideString = '-';
  MAX_INT: Longint = 2147483647;

function TPunyClass.Encode(const input: WideString): WideString;
var
  n, delta, bias, b, l, h, q, m, k, t: Longint;
  text, output, c: WideString;
  first: Boolean;

begin
  text := input;
  output := '';

  try
    n := INITIAL_N;
    bias := INITIAL_BIAS;
    b := 0;

    for l := 1 to Length(text) do
    begin
      c := Copy(text, l, 1);

      if IsBasic(c, INITIAL_N) = True then
      begin
        output := output + c;
        b := b + 1;
      end;
    end;

    if Length(output) < Length(text) then
      if Length(output) > 0 then output := output + Delimiter;

    h := b;
    delta := 0;

    while h < Length(text) do
    begin
      m := GetMinCodePoint(n, text);
      delta := delta + UInt(m - n) * (h + 1);

      n := m;

      for l := 1 to Length(text) do
      begin
        c := Copy(text, l, 1);
        if IsBasic(c, n) = True then delta := delta + 1
        else if UInt(AscW(c)) = n then
        begin
          q := delta;

          k := BASE;
          while k <= MAX_INT do
          begin
            if k <= (bias + TMIN) then t := TMIN
            else if k >= (bias + TMAX) then t := TMAX
            else t := k - bias;

            if q < t then break;

            output := output + Chr(Digit2Codepoint(t + ((q - t) Mod (BASE - t))));
            q := (q - t) div (BASE - t);

            k := k + BASE;
          end;

          output := output + Chr(Digit2Codepoint(q));
          first := False;
          if h = b then first := True;
          bias := Adapt(delta, h + 1, first);
          delta := 0;
          h := h + 1;
        end;
      end;

      delta := delta + 1;
      n := n + 1;
    end;
  except
    output := input;
  end;

  Result := output;
end;

function TPunyClass.Decode(const input: WideString): WideString;
var
  n, i, bias, l, ps, oldi, w, k, t: Longint;
  digit: Byte;
  text, output, c: WideString;
  first: Boolean;

begin
  text := input;
  output := '';

  try
    n := INITIAL_N;
    bias := INITIAL_BIAS;
    i := 0;

    ps := PosRev(Delimiter, text);

    if ps > 0 then
    begin
      for l := 1 to (ps - 1) do
      begin
        c := Copy(text, l, 1);

        if IsBasic(c, INITIAL_N) = True then output := output + c
        else
        begin
          Result := '';
          Exit;
        end;
      end;
    end;

    ps := ps + 1;

    while ps <= Length(text) do
    begin
      oldi := i;
      w := 1;

      k := BASE;
      while ((k <= MAX_INT) and (ps <= Length(text))) do
      begin
        c := Copy(text, ps, 1);
        ps := ps + 1;

        digit := Codepoint2Digit(Asc(c));
        if ((digit >= BASE) or (digit > ((MAX_INT - i) / w))) then
        begin
          Result := '';
          Exit;
        end;

        i := i + digit * w;

        if k <= bias then t := TMIN
        else if k >= (bias + TMAX) then t := TMAX
        else t := k - bias;

        if digit < t then break;

        if w > (maxint / (base - t)) then
        begin
          Result := '';
          Exit;
        end;

        w := w * (BASE - t);

        k := k + BASE;
      end;

      first := False;
      if oldi = 0 then first := True;
      bias := Adapt(i - oldi, Length(output) + 1, first);

      if (i / (Length(output) + 1)) > (MAX_INT - n) then
      begin
        Result := '';
        Exit;
      end;

      n := n + i div (Length(output) + 1);
      i := i mod (Length(output) + 1);

      if IsBasic(WideChar(n), INITIAL_N) = True then
      begin
        Result := '';
        Exit;
      end;

      output := Copy(output, 1, i) + WideChar(n) + Copy(output, i + 1, Length(output) - (i + 1) + 1);
      i := i + 1;
    end;
  except
    output := input;
  end;

  Result := output;
end;

function TPunyClass.GetMinCodePoint(const n: Longint; const data: WideString): Longint;
var
  t, a, res: Longint;

begin
  res := 2147483647;

  for t := 1 to Length(data) do
  begin
    a := UInt(AscW(Copy(data, t, 1)));
    if ((a >= n) and (a < res)) then res := a;
  end;

  Result := res;
end;

function TPunyClass.IsBasic(c: WideString; const n: Longint): Boolean;
begin
  Result := False;
  if UInt(AscW(c)) < n then Result := True;
end;

function TPunyClass.Adapt(const delta, numpoints: Longint; const firsttime: Boolean): Longint;
var
  k, dt: Longint;

begin
  dt := delta;

  if firsttime = True then dt := dt div DAMP
  else dt := dt div 2;

  dt := dt + (dt div numpoints);
  k := 0;

  while dt > (((BASE - TMIN) * TMAX) div 2) do
  begin
    dt := dt div (BASE - TMIN);
    k := k + BASE;
  end;

  Result := k + (((BASE - TMIN + 1) * dt) div (dt + SKEW));
end;

function TPunyClass.Digit2Codepoint(const d: Longint): Longint;
begin
  Result := 0;

  if d < 26 then Result := d + 97
  else if d < 36 then Result := d - 26 + 48;
end;

function TPunyClass.Codepoint2Digit(const c: Longint): Longint;
begin
  Result := BASE;

  if (c - 48) < 10 then Result := c - 22
  else if (c - 65) < 26 then Result := c - 65
  else if (c - 97) < 26 then Result := c - 97;
end;

function TPunyClass.UInt(i: Longint): Longint;
begin
  Result := i;
  if i < 0 then Result := 65536 + i;
end;

function TPunyClass.Asc(s: WideString): Longint;
var
  c: WideChar;

begin
  Result := 0;

  if Length(s) > 0 then
  begin
    c := s[1];
    Result := Word(c);
  end;
end;

function TPunyClass.AscW(s: WideString): Longint;
var
  c: WideChar;

begin
  Result := 0;

  if Length(s) > 0 then
  begin
    c := s[1];
    Result := Longint(c);
  end;
end;

function TPunyClass.PosRev(sub, text: WideString): Longint;
var
  p: Longint;
  s: WideString;

begin
  Result := 0;

  s := '';
  for p := 1 to Length(text) do s := s + Copy(text, Length(text) - p + 1, 1);

  p := Pos(sub, s);
  if p > 0 then Result := Length(s) - p + 1;
end;

// End of Punny Code
///////////////////////////////////


function RF_ValidEMail(AEMail: string): Boolean;
// Returns True if the email address is valid for RFC 2822
// Author: Ernesto D'Spirito / modified R.Frei
const
  // Valid characters in an "atom"
  atom_chars = ['A'..'Z', 'a'..'z', '0'..'9', '!', '#', '$', '%', '&', '''', '*', '+',
    '-', '/', '=', '?', '^', '_', '`', '(', '|', ')', '~'];

  // Valid characters in a "quoted-string"
  quoted_string_chars = [#0..#255] - ['"', #13, '\'];
  // Valid characters in a subdomain
  letters = ['A'..'Z', 'a'..'z'];
  letters_digits = ['0'..'9', 'A'..'Z', 'a'..'z'];
  subdomain_chars = ['-', '0'..'9', 'A'..'Z', 'a'..'z'];
type
  States = (STATE_BEGIN, STATE_ATOM, STATE_QTEXT, STATE_QCHAR,
    STATE_QUOTE, STATE_LOCAL_PERIOD, STATE_EXPECTING_SUBDOMAIN,
    STATE_SUBDOMAIN, STATE_HYPHEN);
var
  State: States;
  i, n, subdomains, LastSubDomSep: Integer;
  c: Char;
begin
  AEMail := RF_PunyEncodeMailAddr(AEMail);
  State := STATE_BEGIN;
  n := Length(AEMail);
  i := 1;
  LastSubDomSep := 0;
  subdomains := 1;
  while (i <= n) do
  begin
    c := AEMail[i];
    case State of
      STATE_BEGIN:
        if CharInSet(c, atom_chars) then
          State := STATE_ATOM
        else if c = '"then
          State := STATE_QTEXT
        else
          break;
      STATE_ATOM:
        if c = '@then
          State := STATE_EXPECTING_SUBDOMAIN
        else if c = '.then
          State := STATE_LOCAL_PERIOD
        else if not CharInSet(c, atom_chars) then
          break;
      STATE_QTEXT:
        if c = '\then
          State := STATE_QCHAR
        else if c = '"then
          State := STATE_QUOTE
        else if not CharInSet(c, quoted_string_chars) then
          break;
      STATE_QCHAR:
        State := STATE_QTEXT;
      STATE_QUOTE:
        if c = '@then
          State := STATE_EXPECTING_SUBDOMAIN
        else if c = '.then
          State := STATE_LOCAL_PERIOD
        else
          break;
      STATE_LOCAL_PERIOD:
        if CharInSet(c, atom_chars) then
          State := STATE_ATOM
        else if c = '"then
          State := STATE_QTEXT
        else
          break;
      STATE_EXPECTING_SUBDOMAIN:
        if CharInSet(c, letters_digits) then // rf. Orignal nur letters. 8488.ch wäre aber dann falsch?!
          State := STATE_SUBDOMAIN
        else
          break;
      STATE_SUBDOMAIN:
        if c = '.then
        begin
          Inc(subdomains);
          LastSubDomSep := i;
          State := STATE_EXPECTING_SUBDOMAIN
        end
        else if c = '-then
          State := STATE_HYPHEN
        else if not CharInSet(c, letters_digits) then
          break;
      STATE_HYPHEN:
        if CharInSet(c, letters_digits) then
          State := STATE_SUBDOMAIN
        else if c <> '-then
          break;
    end;
    Inc(i);
  end;
  if i <= n then
    Result := False
  else
    Result := (State = STATE_SUBDOMAIN) and (subdomains >= 2) and (n - LastSubDomSep >= 2);
end;

Geändert von Rolf Frei (20. Jun 2022 um 17:25 Uhr)
  Mit Zitat antworten Zitat